##### The following script performs the following four crucial operations:
## 1. Subsets respondents who have observed values on all individual-level variables (i.e. listwise deletes the ESS data)
## 2. Centers all individual-level predictors within country-years/country-rounds
## 3. Decomposes the effects of the four contextual-level variables into cross-sectional effect (BE) and longitudinal effect (WE)
## 4. Creates welfare regime country-level variables


########## 1. MISSING DATA: listwise deletion
### Subsetting only respondents who have valid values on all individual-level variables entering the subsequent substantive analyses
# contextual variables are entered here in their original format (i.e. they are not centered within country-years)
ess_context$compl <- !is.na(ess_context$red_sup) & !is.na(ess_context$hincfel) & !is.na(ess_context$employed) & !is.na(ess_context$unemployed) & 
  !is.na(ess_context$female) & !is.na(ess_context$age) & !is.na(ess_context$isced2) & !is.na(ess_context$isced3) & !is.na(ess_context$isced4) & 
  !is.na(ess_context$isced56) & !is.na(ess_context$lrscale) & !is.na(ess_context$stfeco) 

# creating a "listwise deletion" (_ld) data frame that contains only cases that have observed values on all variables
ess_4_ld <- ess_context[ess_context$compl == TRUE, ]
# After excluding the missing data at the individual level, the data set contains 319752 respondents. These respondents enter all the analyses reported in the article.
dim(ess_4_ld)
setDT(ess_4_ld)

# how many country-rounds are available (202)
length(table(ess_4_ld$cntry_round))

### France round 1 and France round 2 were excluded (because the subjective income variable is not available in these two rounds)
table(ess_4_ld$cntry, ess_4_ld$essround)

# All contextual variables are available for all country rounds
table(is.na(ess_4_ld$LFPR_y0))
table(is.na(ess_4_ld$SOCEXP_y0))
table(is.na(ess_4_ld$GDP_y0))
table(is.na(ess_4_ld$gini_disp_y0))



########## 2. Centering individual-level predictors
### PROGRAMMING TWO FUNCTIONS FOR GROUP-MEAN CENTERING (at the individual level)
########## ##### GROUP-MEAN CENTERING FUNCTIONS
center <- function(x) {
  if(!is.numeric(x)) {
    return(x) # if not numeric return the variable
  } else { # if numeric center
    x_mean <- mean(x, na.rm = TRUE)
    x_center <- x - x_mean
    return(x_center)
  }
}

center_df <- function(df, col_center, group) {
  require(data.table)
  df <- as.data.table(df)
  
  newcols <- paste0(col_center, "_CWC")
  
  df_center <- df[, c(newcols) := lapply(.SD, center),
                  by = eval(group), 
                  .SDcols = col_center]
  
  return(as.data.frame(df_center))
}
##### GROUP-MEAN CENTERING FUNCTIONS


### Creating three dummy variables for the perceived income variable (hincfel)
table(ess_4_ld$hincfel)

# hinc_2_cop = 2 Coping on present income
ess_4_ld$hinc_2_cop <- ifelse(ess_4_ld$hincfel == 2, 1, 0)
table(ess_4_ld$hincfel, ess_4_ld$hinc_2_cop)

# hinc_3_dif = 3 Difficult on present income
ess_4_ld$hinc_3_dif <- ifelse(ess_4_ld$hincfel == 3, 1, 0)
table(ess_4_ld$hincfel, ess_4_ld$hinc_3_dif)

# hinc_4_vdif = 4 Very difficult on present income
ess_4_ld$hinc_4_vdif <- ifelse(ess_4_ld$hincfel == 4, 1, 0)
table(ess_4_ld$hincfel, ess_4_ld$hinc_4_vdif)


##### Group-mean centering (CWC) the selected individual level predictor variables (at the country-round level)
dim(ess_4_ld)
ess_4_ld <- center_df(df = ess_4_ld, col_center = c("employed", "unemployed", "hinc_2_cop", "hinc_3_dif", "hinc_4_vdif", 
                                                    "female", "age", "isced2", "isced3", "isced4", "isced56", "stfeco" ,"lrscale"), group = "cntry_round")



########## 3. Decomposing the effects of contextual-level variables into BEs and WEs
##### ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### #####
##### CONTEXTUAL VARIABLES (202 country-rounds) ##### ##### ##### ##### #####
##### ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### #####
# this script checks how many unique values there are - maximum number CANNOT exceed the number of country rounds (e.g. 202 country-rounds)
context_names <- c("LFPR_y0", "SOCEXP_y0", "GDP_y0", "gini_disp_y0")
for(i in 1:length(context_names)){
  unique_cr <- uniqueN(ess_4_ld, by = context_names[i])
  print(unique_cr)
}


# creating a matrix with contextual variables exported from the "ess_4_ld"
setDT(ess_4_ld)
ess_4_context <- ess_4_ld[, .(LFPR_y0 = mean(LFPR_y0), SOCEXP_y0 = mean(SOCEXP_y0), GDP_y0 = mean(GDP_y0), gini_disp_y0 = mean(gini_disp_y0)), by = c("cntry", "essround")]
for(i in 1:length(context_names)){
  unique_cr <- uniqueN(ess_4_context, by = context_names[i])
  print(unique_cr)
}


# computing the means of contextual variables within countries (to estimate the BE of contextual variables)
# these means are computed based on the above aggregated "ess_4_context" matrix
ess_4_context_means <- ess_4_context[, .(mean_LFPR_y0 = mean(LFPR_y0), mean_SOCEXP_y0 = mean(SOCEXP_y0), mean_GDP_y0 = mean(GDP_y0), 
                                         mean_gini_disp_y0 = mean(gini_disp_y0)), by = "cntry"]
# correlation matrix between contextual variables at the country level
cor(ess_4_context_means[, 2:5])

# merging the country means with the original contextual dataset (so that contextual variables could be centered within country-years)
ess_4_context <- ess_4_context_means[ess_4_context, on = "cntry"]


### Centering contextual variables within clusters (CWC)
ess_4_context$dev_mean_LFPR_y0 <- ess_4_context$LFPR_y0 - ess_4_context$mean_LFPR_y0
ess_4_context$dev_mean_SOCEXP_y0 <- ess_4_context$SOCEXP_y0 - ess_4_context$mean_SOCEXP_y0
ess_4_context$dev_mean_GDP_y0 <- ess_4_context$GDP_y0 - ess_4_context$mean_GDP_y0
ess_4_context$dev_mean_gini_disp_y0 <- ess_4_context$gini_disp_y0 - ess_4_context$mean_gini_disp_y0

# Deleting the four variables that are already in ess_4_ld data. frame
ess_4_context[, c("LFPR_y0", "SOCEXP_y0", "GDP_y0", "gini_disp_y0") := NULL]

# Computing correlations between respective pairs of variables (to check whether the BE and WE is orthogonal)
cor(ess_4_context$dev_mean_LFPR_y0, ess_4_context$mean_LFPR_y0)
cor(ess_4_context$dev_mean_SOCEXP_y0, ess_4_context$mean_SOCEXP_y0)
cor(ess_4_context$dev_mean_GDP_y0, ess_4_context$mean_GDP_y0)
cor(ess_4_context$dev_mean_gini_disp_y0, ess_4_context$mean_gini_disp_y0)



### Merging the decomposed contextual-level variables with the already prepared individual-level ess_4_ld data.frame
# Multilevel models will be computed on the resulting data.frame "ess_4_ld_models"
ess_4_ld_models <- ess_4_context[ess_4_ld, on = .(cntry, essround)]

# Deleting the "listwise deletion" (_ld) - since it does not have the prepared contextual variables and is therefore just a duplicate
rm(ess_4_ld)
rm(context_names, i, unique_cr, x)



########## 4. WELFARE REGIME VARIABLES
table(as.character(ess_4_ld_models$cntry))

# The welfare regime variable is be based on country classification obtained from Deeming and Jones 2015 article (DOI: 10.1080/00207659.2015.1098175)
ess_4_ld_models$welfare_regime <- rep(NA, nrow(ess_4_ld_models))

# Social-democratic regime = 5 countries (Denmark, Norway, Finland, Sweden, Iceland)
ess_4_ld_models$welfare_regime[ess_4_ld_models$cntry == "DK" | ess_4_ld_models$cntry == "NO" | ess_4_ld_models$cntry == "FI" |
                                 ess_4_ld_models$cntry == "SE" | ess_4_ld_models$cntry == "IS"] <- "Social-democratic"

# Conservative regime = 6 countries (Austria, Belgium, Germany, France, Switzerland, Netherlands)
ess_4_ld_models$welfare_regime[ess_4_ld_models$cntry == "AT" | ess_4_ld_models$cntry == "BE" | ess_4_ld_models$cntry == "DE" |
                                 ess_4_ld_models$cntry == "FR" | ess_4_ld_models$cntry == "CH" | ess_4_ld_models$cntry == "NL"] <- "Conservative"

# Liberal regime = 2 countries (United Kingdom, Ireland)
ess_4_ld_models$welfare_regime[ess_4_ld_models$cntry == "GB" | ess_4_ld_models$cntry == "IE"] <- "Liberal"

# Post-communist regime = 8 countries (Bulgaria, Czech Republic, Estonia, Hungary, Lithuania, Poland, Slovenia, Slovakia)
ess_4_ld_models$welfare_regime[ess_4_ld_models$cntry == "BG" | ess_4_ld_models$cntry == "CZ" | ess_4_ld_models$cntry == "EE" |
                                 ess_4_ld_models$cntry == "HU" | ess_4_ld_models$cntry == "LT" | ess_4_ld_models$cntry == "PL" |
                                 ess_4_ld_models$cntry == "SI" | ess_4_ld_models$cntry == "SK" ] <- "Post-communist"

# Mediterranean regime = 6 countries (Cyprus, Spain, Greece, Israel, Italy, Portugal)
ess_4_ld_models$welfare_regime[ess_4_ld_models$cntry == "CY" | ess_4_ld_models$cntry == "ES" | ess_4_ld_models$cntry == "GR" | 
                                 ess_4_ld_models$cntry == "IL" | ess_4_ld_models$cntry == "IT" | ess_4_ld_models$cntry == "PT"] <- "Mediterranean"

table(ess_4_ld_models$welfare_regime)
table(as.character(ess_4_ld_models$cntry), ess_4_ld_models$welfare_regime)
table(is.na(ess_4_ld_models$welfare_regime))

# Computing the average redistribution support for each welfare regime
ess_4_ld_models[, mean(red_sup), by = "welfare_regime"]

# Creating a dummy variable for the liberal regime (as lowest redistribution support is expected for this regime based on the welfare regime thoery)
ess_4_ld_models$liberal <- ifelse(ess_4_ld_models$welfare_regime == "Liberal", yes = 1, no = 0)
table(ess_4_ld_models$cntry, ess_4_ld_models$liberal)

# Creating a dummy variable for the Mediterranean regime (as highest redistribution support is expected for this regime based on the welfare regime thoery)
ess_4_ld_models$mediter <- ifelse(ess_4_ld_models$welfare_regime == "Mediterranean", yes = 1, no = 0)
table(ess_4_ld_models$cntry, ess_4_ld_models$mediter)

